#### This script produces the table with the models from the original analyses
#### All code lines are directly copied and pasted from the original dataverse files 
#### by Skorge (2021), unless otherwise specified below. 

dt <- fread(here("data_main.csv"))

##########################################################################
###       This part of the code prepares the data for the analysis     ###
##########################################################################


# 1919 PR reform variable used in the analyses
dt[prORmaj13to16==1, prFrom10 := ifelse(year==1910,pr13,
                                        ifelse(year>=1919,1,pr))]


# Notes from the author about this variable:
# 1) As mentioned in Section 4.1, the main analysis excludes municipalities that 
# switched to PR between 1913 and 1916. The prFrom10 variable is therefore set 
# to missing for municipalities that switched to PR between 1913 and 
# 1916 election (prORmaj13to16==1) (see Section 4.1).
# 2) Also as mentioned in Section 4.1, the coding of prFrom10 extends 
# the PR variable to 1910 election (information on electoral system not
# included in the electoral statistics for 1910), by assigning each municipality
# the electoral system it had in place in 1913 (pr13)
# 3) prFrom10 codes all municipalities as 1 (having PR) from the 1919 election 
# and onward, i.e., after the PR reform forcing all municipalities to switch 
# (see discussion in Sections 3.1, 3.2, and 4). Information on electoral system 
# by municipality is not available in the electoral statistics for the 1919 
# and subsequent elections).


# Creating data set for the analysis in Table 1
dt.a <- dt
dt.a[, muni. := muni] # Municipalities for display of regression table

# Vectors of covariates

# First, the author creates the vector of what he calls "electoral demographic" 
# covariates ("v.ctrls.pol", denoted by “D” in Table 1 in the paper).
# These covariates are: 1) Log number of voters eligible to vote in the election 
# (log.elVotersT), 2) percentage women among eligible voters (relElVotersW), 
# 3) log number of representatives in the municipal council (log.repsT),
# 4) log population (log.pop), 5) log population density (log.popDens), --the author
# does not declare controlling for this variable--, and 
# 5) the female fraction of the population (relPopW)
v.ctrls.pol <- c("log.elVotersT","relElVotersW","log.repsT","log.pop","log.popDens","relPopW")

# The author also defines a vector of "other" covariates ("v.ctrls.other",
# denoted by “O” in Table 1 in the paper). This vector includes: 
# 1) Percentage employed in industry, as a percentage of total employment (occupInduPct),
# 2) Percentage employed in services, as a percentage of total employment (occupServicePct),
# 3) Percentage employed in shipping and fisheries, as a percentage of total employment
# (occupSeaPct), and 4) Percentage of the population in nonconformist 
# (Dissenting) religious societies (relDissenterPct).
v.ctrls.other <- c("occupInduPct","occupServicePct","occupSeaPct","relDissenterPct") 

# A vector with both sets of controls
v.ctrls <- c(v.ctrls.pol, v.ctrls.other)



# The author creates a new dataset to make sure that all  regression models
# have the same number of observations

m.getNAs <- lm_robust(as.formula(paste0("relTurnoutW ~ prFrom10 +",
                                        paste0(v.ctrls,collapse="+"))),
                      data=dt.a,
                      clusters=muni,
                      fixed_effects=~muni + year,
                      se_type="stata")

dt.temp <- dt.a[as.numeric(attr(m.getNAs[["fitted.values"]],"names")),]

# Balanced data: keep municipalities present in all 7 elections
dt.temp[, balanced := Max(.N)>6, by=muni]
dt.a <- dt.temp[balanced==1,]

# Municipality-specific time trend variables (linear and squared)
dt.a[, time := as.numeric(as.factor(year))]

inds <- sort(unique(dt.a$muni))
v_time <- paste0("time_",inds)
dt.a[, (v_time) := lapply(inds, function(x) as.numeric(muni == x))]
dt.a[, (v_time) := lapply(.SD, function(x) x*time), .SDcols=v_time]

v_time_sq <- paste0("time_sq_",inds)
dt.a[, (v_time_sq) := lapply(inds, function(x) as.numeric(muni == x))]
dt.a[, (v_time_sq) := lapply(.SD, function(x) x*time*time), .SDcols=v_time_sq]


##########################################################################
###            This part of the code replicates the original           ###
###         estimated models 4 and 6 in Table 1 (p. 9) presented by    ###
###           the author in the paper's main analysis                  ###
##########################################################################

dv <- "relTurnoutW"

# PR, FEs and all covariates
m.1a <- lm_robust(as.formula(paste0(dv," ~ prFrom10 +",
                                    paste0(c(v.ctrls.pol,
                                             v.ctrls.other),collapse="+"))),
                  data=dt.a,
                  clusters=muni,
                  fixed_effects=~muni + year,
                  se_type = "stata")




# PR, FEs, all covariates, and municipality-specific time trends squared
m.1b <- lm_robust(as.formula(paste0(dv," ~ prFrom10 +",
                                    paste0(c(v.ctrls.pol,
                                             v.ctrls.other,
                                             v_time,
                                             v_time_sq),collapse="+"))),
                  data=dt.a,
                  clusters=muni,
                  fixed_effects=~muni + year,
                  se_type = "stata")



##########################################################################
###  This part of the code replicates the model 6 in Table E.1 and     ###
###  model 4 in Table G.1 from the original Supplementary Information  ###
###                     provided by the author                         ###
##########################################################################

#####################################
### Model 6 Table E.1 (p. 7 - SI) ###
#####################################

# Indicator for municipalities present in 1916
v.m1916 <- dt %>% 
  dplyr::filter(year==1916) %>% 
  pull(muni)

dt <- dt %>% 
  mutate(presentIn1916 = ifelse(muni %in% v.m1916, 1, 0))

### Switching between 1913 and 1916 ###

# Number of municipalities that switched from plurality to PR 
# between 1913 and 1916
v.nToPR1916<- dt%>% 
  filter(!((pr13==1 & pr16==1)|(pr13==0 & pr16==0)) & year==1916) %>% 
  dplyr::select(muni) %>% 
  n_distinct() #58

# Municipalities not switching electoral system between 1913 and 1916
dt <- dt %>% 
  mutate(notSwitchBtw13and16 = ifelse(is.na(pr13)|is.na(pr16), 1, 
                                      ifelse(!((pr13==1 & pr16==1)|(pr13==0 & pr16==0)), 0, 1)))

### Municipalities present in 1916: 688 ###

dt <- dt %>% 
  mutate(prFrom10n = ifelse(year>=1919, 1, 
                            ifelse(is.na(pr) & year==1910, pr13, pr))) 



# Using only data for 1916 to 1919 and municipalities present in 1916 with controls
m.1c <- lm_robust(as.formula(paste0(dv," ~ prFrom10n +",
                                     paste0(v.ctrls,collapse="+"))),
                   data=dt,
                   subset=presentIn1916==1 & year %in% 1916:1919,
                   clusters=muni,
                   fixed_effects=~muni + year,
                   se_type="stata")

v.mean.nc <- dt %>% 
  slice(as.numeric(attr(m.1c[["fitted.values"]],"names"))) %>% 
  filter(year>=1919 & pr16==0) %>%
  summarize(Mean(relTurnoutW)) %>% 
  pull()



#####################################
### Model 4 Table G.1 (p. 7 - SI) ###
#####################################

# Creating leads for switch to PR
setkeyv(dt.a, c("year","muni"))

dt.a[, f1.prFrom10 := data.table::shift(prFrom10, type="lead", n=1, fill=1, 
                                        give.names=FALSE), by=muni]
dt.a[, f2.prFrom10 := data.table::shift(prFrom10, type="lead", n=2, fill=1, 
                                        give.names=FALSE), by=muni]

v.leads <- paste0("f",1:2,".prFrom10")


# Model with leads and all covariates
m.1d <- lm_robust(as.formula(paste0("relTurnoutW ~ prFrom10 +",
                                         paste0(c(v.leads,
                                                  v.ctrls.pol,
                                                  v.ctrls.other),collapse="+"))),
                       data=dt.a,
                       clusters=muni,
                       fixed_effects=~muni + year,
                       se_type="stata")

v.mean.nd <- dt %>% 
  slice(as.numeric(attr(m.1d[["fitted.values"]],"names"))) %>% 
  filter(year>=1919 & pr16==0) %>%
  summarize(Mean(turnoutGap)) %>% 
  pull()

#### This chunk creates Table with the models replicated above

# Put all the models in the list
ls.m <- mget(ls(pattern="^m\\.1"))



# Use the get.regtable function to create
# regression output table


dt.reg <- get.regtable(ls.m,
                       var.names = c("PR reform (1919)", "PR (lead t+1)","PR (lead t+2)"),
                       keep.vars = c("pr", "f1.prFrom10", "f2.prFrom10"),
                       digits.b.0=3,
                       weights=FALSE,
                       checkmark=c("Yes",""),
                       dv.mean=as.list(c(rep(dt.a[, Mean(relTurnoutW[year>=1919 & pr16==0])],2),v.mean.nc, v.mean.nd)),
                       dv.mean.name="Mean $Y_{treated, t \\geq 1919}$",
                       effects="se",
                       model.labs=rep(c("Model 4, Table 1","Model 6, Table 1","Model 6, Table E.1","Model 4, Table G.1"),1))

# Add description of covariates included
dt.reg[dt.reg$term %in% "Covariates",] <- c("Covariates", "All covars./No time trends","All covars. and time trends","All covars.","All covars. and Leads")


options(knitr.kable.NA = '')

kable.out <- dt.reg[-c(1,12:14),] %>%
  kable(caption="OLS regression results reporting the estimated effect of PR on gender equality in voting, 1910-1928",
        booktabs=TRUE,longtable=FALSE,format="html",
        align=c("l",rep("c",ncol(dt.reg)-1)),
        escape=FALSE,
        linesep="",
        row.names=FALSE,
        col.names=dt.reg[1,]) %>%
  kable_styling(font_size = 10,
                position = "center",
                full_width = FALSE) %>%
  row_spec(2, extra_latex_after="\\midrule") %>%
  row_spec(5, extra_latex_after="\\midrule") %>%
  add_header_above(c(" " = 1,
                     "Fraction female voters" = 4
  )) %>%
  footnote(general='Standard errors clustered by municipality are in parentheses. Model 4, Table 1 reports the model in the author´s main analysis including all the demographic and political covariates but no time trends; Model 6, Table 1 reports the same model including the linear and squared time trends (time trends are municipality specific); Model 6, Table E.1, reports the robustness check presented by the author in the SI where the full sample of  municipalities is used but time is restricted to the 1916 and 1919 elections (as in a more traditional Diff-in-Diff setting) using the full set of demographic and political covariates; Model 4, Table G.1 reports the Model 4 from the main analysis (All covars./No time trends) with leads of the 1919 PR treatment. The demographic and political covariates  included are: population (log), the female fraction of the population, eligible voters (log), the female fraction of eligible voters, and representatives in the municipal council (log), percentage of the population in nonconformist (Dissenting) religious societies and variables for the percentage of the working population in each of the employment categories industry, shipping, services, and agriculture (with agriculture as the omitted category). The effect size is the percentage increase from the counterfactual outcome: $\\\\hat{\\\\gamma}_{pr}/(\\\\bar{Y}_{treated, t \\\\geq 1919} - \\\\hat{\\\\gamma}_{pr}) \\\\times 100$, where $\\\\hat{\\\\gamma}_{pr}$ is the estimated PR coefficient and  $\\\\bar{Y}_{treated, t \\\\geq 1919}$ is the mean outcome in year $\\\\geq$ 1919 for the treated municipalities.',
           general_title="Note:",
           footnote_as_chunk=TRUE,
           threeparttable=TRUE,
           escape=FALSE)

kable.out

if(!dir.exists("tables")) dir.create("tables")
save_kable(kable.out, file=here("tables","table_replication_original.html"))

setDT(dt.reg)


